##### Helper functions #####

#Fills in NA values for Alexa data
fill_in_traffic <- function(gap){
  for (i in unique(sample_traffic_out$site)) {
    traffic_site <- dplyr::filter(sample_traffic_out,site == i)
    traffic_site <- dplyr::mutate(traffic_site,na_number = sum(is.na(page_views_per_million)))
    print(i)
    imputeTS::statsNA(traffic_site$page_views_per_million)
    
    traffic_site <- traffic_site %>% 
      mutate(reach_per_million_original = reach_per_million,
             
             page_views_per_million = imputeTS::na_interpolation(page_views_per_million,
                                                                 maxgap = gap),
             reach_per_million3 = imputeTS::na_interpolation(reach_per_million,
                                                             maxgap = 3),
             reach_per_million14 = imputeTS::na_interpolation(reach_per_million,
                                                              maxgap = 14),
             page_views_per_user = imputeTS::na_interpolation(page_views_per_user,
                                                              maxgap = gap),
             reach_per_million = imputeTS::na_interpolation(reach_per_million,
                                                            maxgap = gap))
    
    if (i == unique(sample_traffic_out$site)[1]) {
      sample_traffic_final_out <- traffic_site
    }
    else {
      sample_traffic_final_out <- dplyr::bind_rows(sample_traffic_final_out,traffic_site)
    }
  }
  return(sample_traffic_final_out)
}

#Get information on missing values for blocked outlet
get_nas_censored <- function(bw){
  sample_traffic_nas_close <- filter(sample_egypt_censored,before_after_time>=-bw & before_after_time<=bw) %>% 
    mutate(zeros=ifelse(reach_per_million==0,1,0)) %>% group_by(site) %>%
    summarize(zeros=sum(zeros),nas=sum(is.na(reach_per_million_original)),
              size=unique(size),blocking=unique(blocking))
  
  sample_traffic_nas_close <- sample_traffic_nas_close %>% mutate(nas=nas-zeros)
  
  #All
  zeros <- sum(sample_traffic_nas_close$zeros) /
    nrow(filter(sample_egypt_censored,before_after_time >= -bw & before_after_time <= bw))
  
  nas <- sum(sample_traffic_nas_close$nas) /
    nrow(filter(sample_egypt_censored,before_after_time >= -bw & before_after_time <= bw))
  
  
  #Above median
  zeros_above <- sum(filter(sample_traffic_nas_close, size == "Above median")$zeros) /
    nrow(filter(sample_egypt_censored,before_after_time >= -bw & before_after_time <= bw & size == "Above median"))
  
  nas_above <- sum(filter(sample_traffic_nas_close, size == "Above median")$nas) /
    nrow(filter(sample_egypt_censored,before_after_time >= -bw & before_after_time <= bw & size == "Above median"))
  
  #Below median
  zeros_below <- sum(filter(sample_traffic_nas_close, size == "Below median")$zeros) /
    nrow(filter(sample_egypt_censored,before_after_time >= -bw & before_after_time <= bw & size == "Below median"))
  
  nas_below <- sum(filter(sample_traffic_nas_close, size == "Below median")$nas) /
    nrow(filter(sample_egypt_censored,before_after_time >= -bw & before_after_time <= bw & size == "Below median"))
  return(cbind(nas,zeros,nas_above,zeros_above,nas_below,zeros_below))
}

#Function to plot traffic analysis (incl. robustness and heterogeneity analyses)
plot_traffic <- function(bw = NULL, stance_fun = NULL, size_fun = NULL,
                         outcome_fun = NULL,
                         dataset = NULL, donut = FALSE){
  #Function written based on Pan and Siegel (2020): How Saudi crackdowns fail to silence online dissent
  
  data_fun <- dataset
  data_fun <- data_fun %>% mutate(outcome = get(outcome_fun))
  data_fun <- data_fun %>% filter(before_after_time >= -bw & before_after_time <= bw) %>%
    filter(stance %in% as.character(stance_fun) & size %in% as.character(size_fun)) %>%
    group_by(before_after_time) %>% summarize(outcome = mean(outcome))
  
  data_fun <- data_fun %>% mutate(blocked = ifelse(before_after_time >= 0,1,0),
                                  blocked = as.factor(blocked))
  
  
  if (donut == TRUE) {
    data_fun <- filter(data_fun,!(before_after_time %in% seq(-5,5)))
  }
  
  #Leave out Traffic at censorship date for loess function (as not known when exactly censoring took place)
  highlight_df <- data_fun %>% 
    filter(before_after_time == 0)
  
  data_fun <- filter(data_fun,before_after_time != 0)
  
  average_change <- round((mean(filter(data_fun,before_after_time > 0)$outcome) - 
                             mean(filter(data_fun,before_after_time < 0)$outcome)) /
                            mean(filter(data_fun,before_after_time < 0)$outcome),2)*100
  
  descriptive_plot <-
    ggplot(data_fun, aes(before_after_time, outcome, group = blocked)) +  
    geom_point(colour = "gray40", size = 3) + 
    geom_smooth(method = "loess", se = F, colour = "black", fill = "black", span = .5, size = 2) +  
    geom_vline(xintercept = 0, linetype = 2) + 
    labs(y = "Average traffic per million", x = "Days pre-censorship and post-censorship") + 
    annotate("text", label = "Blocking date", x = 0, y = Inf, vjust = 2, hjust = 1, size = 13) +
    geom_point(data = highlight_df, 
               aes(before_after_time,outcome), 
               color = 'red',
               size = 3) +
    annotate("text", x =  Inf, y = Inf, hjust = 1,vjust = 2, label = paste0(average_change,"%"),
             size = 13) +
    theme_minimal( ) +
    theme(text = element_text(size = 30), panel.grid = element_blank(),
          panel.border = element_rect(colour = "black", fill = NA, size = 1),
          panel.spacing = unit(0, "in"),
          plot.margin = unit(c(5.5, 5.5, 5.5, 5.5), "points")) +
    ylim(0, max(data_fun$outcome)*1.1)
  
  if (donut == TRUE) {
    descriptive_plot <- descriptive_plot + geom_rect(xmin = -6, xmax = 6,
                                                     ymin = -Inf,ymax = Inf,
                                                     fill = "grey", alpha = 0.01)
  }
  
  return(list(descriptive_plot,average_change))
}

#Function to plot placebo traffic analysis (incl. robustness and heterogeneity analyses)
plot_traffic_placebo <- function(bw=NULL, stance_fun = NULL, size_fun = NULL,
                                 outcome_fun = NULL,
                                 dataset = NULL, nsims = 10000, donut = FALSE){
  #Function written based on Pan and Siegel (2020): How Saudi crackdowns fail to silence online dissent
  data_fun <- dataset
  data_fun <- data_fun %>% mutate(outcome = get(outcome_fun))
  data_fun <- data_fun %>% filter(before_after_time >= -bw & before_after_time <= bw) %>%
    filter(stance %in% as.character(stance_fun) & size %in% as.character(size_fun))
  
  #Get actual difference
  data_fun <- data_fun %>% mutate(blocked = ifelse(before_after_time >= 0,1,0),
                                  blocked = as.factor(blocked))
  pre_post <- lm(outcome~blocked, data = filter(data_fun,before_after_time != 0))
  
  set.seed(1234)
  nsims = nsims
  store_betas = data.frame(start = NA, coef = numeric(nsims))
  
  # random select dates across period before and after censorship
  minday = -bw
  maxday = bw
  days = seq(minday, maxday, by = 1)
  
  #Define period for later plot
  if (bw == 142) {
    period <- "(5 months)"
  }
  if (bw == 30) {
    period <- "(1 month)"
  }
  if (!(bw %in% c(30,142))) {
    period <- ""
  }
  
  
  # do the simulations
  for (i in 1:nsims) {
    # select treatment date to kick in
    start = sample(days, 1)
    data_fun$blocked2 <- ifelse(data_fun$before_after_time >= start,1,0)
    
    #add donut expression
    if (donut == TRUE) {
      reg = lm(outcome ~ blocked2, data = filter(data_fun,!(before_after_time %in% (start - seq(-5,5)))))
    }
    
    # use OLS regression to obtain mean difference in pre-post volume
    reg = lm(outcome ~ blocked2, data = filter(data_fun,before_after_time != start))
    
    store_betas$start[i] = start
    store_betas$coef[i] = coef(reg)[2]
  }
  
  # get observed difference
  actual_est = coef(pre_post)[2]
  
  # get p-value 
  store_betas <- na.omit(store_betas)
  pval = mean(store_betas$coef <= actual_est)
  
  #Plot placebo tests
  plot_placebo <-  ggplot(store_betas,aes(x = coef)) +
    geom_density(color = "black", fill = "gray") +
    theme_minimal() +
    theme(text = element_text(size = 30), panel.grid = element_blank(),
          panel.border = element_rect(colour = "black", fill = NA, size = 1), #.85
          panel.spacing = unit(0, "in"),
          plot.margin = unit(c(5.5, 5.5, 5.5, 5.5), "points")) +
    labs(x = paste("Difference in average traffic per million", period), y = "Density") +
    geom_vline(xintercept = actual_est, linetype = "dotted", size = 2) +
    geom_segment(aes(x = actual_est + 0.5, xend = actual_est + 0.1, y = max(density(store_betas$coef)$y) + 
                       max(density(store_betas$coef)$y)/5, yend = max(density(store_betas$coef)$y) + 
                       max(density(store_betas$coef)$y)/5), arrow = arrow(length = unit(3, "mm"))
    ) +
    annotate(geom = "text", x = actual_est + 0.7, y = max(density(store_betas$coef)$y) +
               max(density(store_betas$coef)$y)/5, label = "Observed difference", hjust = "left", size = 12) +
    annotate(geom = "text", x = actual_est + 0.7, y = max(density(store_betas$coef)$y) + 
               max(density(store_betas$coef)$y)/10, 
             label = paste0("(p-value=", round(pval, 2), ")"), hjust = "left", size = 12) +
    xlim(min(store_betas$coef) - 2,max(store_betas$coef) + 2) +
    ylim(0,max(density(store_betas$coef)$y) + max(density(store_betas$coef)$y)/5)
  
  return(list(plot_placebo,store_betas,actual_est,pval))  
}

#Function to plot placebo traffic analysis for substitution analysis
substitute_placebo <- function(dataset = NULL, outcome_fun = NULL, nsims = 10000){
  #Function written based on Pan and Siegel (2020): How Saudi crackdowns fail to silence online dissent
  #Similar as the function above but excludes size variables
  data_fun <- dataset
  data_fun <- data_fun %>% mutate(outcome = get(outcome_fun))
  data_fun <- data_fun %>% filter(before_after_time >= -30 & before_after_time <= 30)
  
  #Get actual difference
  data_fun <- data_fun %>% mutate(blocked = ifelse(before_after_time >= 0,1,0),
                                  blocked = as.factor(blocked))
  pre_post <- lm(outcome~blocked, data = filter(data_fun,before_after_time != 0))
  
  set.seed(1234)
  nsims = nsims
  store_betas = data.frame(start = NA, coef = numeric(nsims))
  
  # random select dates across period before and after arrest
  minday = -30
  maxday = 30
  days = seq(minday, maxday, by = 1)
  
  
  # do the simulations
  for (i in 1:nsims) {
    # select treatment date to kick in
    start = sample(days, 1)
    data_fun$blocked2 <- ifelse(data_fun$before_after_time >= start,1,0)
    
    # use OLS regression to obtain mean difference in pre-post volume
    reg = lm(outcome ~ blocked2, data = filter(data_fun,before_after_time != start))
    
    store_betas$start[i] = start
    store_betas$coef[i] = coef(reg)[2]
  }
  
  # get observed difference
  actual_est = coef(pre_post)[2]
  
  # get one-sided p-value 
  store_betas <- na.omit(store_betas)
  
  if (actual_est > 0) {
    pval = mean(store_betas$coef >= actual_est)
  }
  if (actual_est < 0) {
    pval = mean(store_betas$coef <= actual_est)
  }
  
  #Assign outcome names
  if (outcome_fun == "reach_per_million") {
    outcome_name <- "Traffic per million"
  }
  
  #Plot placebo tests
  if (min(store_betas$coef) < 0) {
    plot_placebo <-  ggplot(store_betas,aes(x = coef)) +
      geom_density(color = "black", fill = "gray") +
      theme_minimal() +
      theme(text = element_text(size = 30), panel.grid = element_blank(),
            panel.border = element_rect(colour = "black", fill = NA, size = 1),
            panel.spacing = unit(0, "in"),
            plot.margin = unit(c(5.5, 5.5, 5.5, 5.5), "points")) +
      labs(x = paste("Difference in",outcome_name), y = "Density") +
      geom_vline(xintercept = actual_est, linetype = "dotted", size = 2) +
      geom_segment(aes(x = actual_est + min(store_betas$coef)/3.5, xend = actual_est + min(store_betas$coef)/30,
                       y = max(density(store_betas$coef)$y) + max(density(store_betas$coef)$y)/5,
                       yend = max(density(store_betas$coef)$y) + max(density(store_betas$coef)$y)/5),
                   arrow = arrow(length = unit(3, "mm"))
      ) +
      annotate(geom = "text", x = actual_est + min(store_betas$coef)/2.5, 
               y = max(density(store_betas$coef)$y) + max(density(store_betas$coef)$y)/5,
               label = "Observed difference", hjust = "right", size = 12) +
      annotate(geom = "text", x = actual_est + min(store_betas$coef)/2.5, 
               y = max(density(store_betas$coef)$y) + max(density(store_betas$coef)$y)/10, 
               label = paste0("(p-value=", round(pval, 2), ")"), hjust = "right", size = 12) +
      xlim(min(store_betas$coef) - 2,max(store_betas$coef) + 2) +
      ylim(0,max(density(store_betas$coef)$y) + max(density(store_betas$coef)$y)/5)
  }
  if (min(store_betas$coef) > 0) {
    plot_placebo <-  ggplot(store_betas,aes(x = coef)) +
      geom_density(color = "black", fill = "gray") +
      theme_minimal() +
      theme(text = element_text(size = 30), panel.grid = element_blank(),
            panel.border = element_rect(colour = "black", fill = NA, size = 1),
            panel.spacing = unit(0, "in"),
            plot.margin = unit(c(5.5, 5.5, 5.5, 5.5), "points")) +
      labs(x = paste("Difference in",outcome_name), y = "Density") +
      geom_vline(xintercept = actual_est, linetype = "dotted", size = 2) +
      geom_segment(aes(x = actual_est - min(store_betas$coef)/3,
                       xend = actual_est - min(store_betas$coef)/30, 
                       y = max(density(store_betas$coef)$y) + max(density(store_betas$coef)$y)/5, 
                       yend = max(density(store_betas$coef)$y) + max(density(store_betas$coef)$y)/5), 
                   arrow = arrow(length = unit(3, "mm"))
      ) +
      annotate(geom = "text", x = actual_est - min(store_betas$coef)/2.5,
               y = max(density(store_betas$coef)$y) + max(density(store_betas$coef)$y)/5, 
               label = "Observed difference", hjust = "right", size = 12) +
      annotate(geom = "text", x = actual_est - min(store_betas$coef)/2.5, 
               y = max(density(store_betas$coef)$y) + max(density(store_betas$coef)$y)/10, 
               label = paste0("(p-value=", round(pval, 2), ")"), hjust = "right", size = 12) +
      xlim(min(store_betas$coef) - 2,max(store_betas$coef) + 2) +
      ylim(0,max(density(store_betas$coef)$y) + max(density(store_betas$coef)$y)/5)
  }
  
  return(list(plot_placebo,store_betas,actual_est,pval))
}



